suppressPackageStartupMessages({
# General use
library(tidyverse)
library(knitr)
library(magrittr)
# Data formats
library(animalcules)
library(MultiAssayExperiment)
library(SummarizedExperiment)
# Modeling
library(lme4)
library(geepack)
})
## No methods found in package 'timeDate' for request: '[<-' when loading 'timeSeries'
# Read in MAE of data
OG_dat <- readRDS("data/animalculesFinalHIV.rds")
dat <- OG_dat[["MicrobeGenetics"]]
# Extract metadata for infants only
microbe <- dat[, which(dat$MothChild == "Infant")]
sam_table <- as.data.frame(colData(microbe))
# Both mothers and infants
microbe_mi <- dat
tax_table <- as.data.frame(SummarizedExperiment::rowData(microbe))
# Extract metadata, taxonomic info, and counts
tax_table_mi <- as.data.frame(SummarizedExperiment::rowData(microbe_mi))
sam_table_mi <- as.data.frame(SummarizedExperiment::colData(microbe_mi))
counts_table_mi <- as.data.frame(SummarizedExperiment::assay(
microbe_mi, "MGX"))[, rownames(sam_table_mi)]
The following statistics are calculated specifically for the infants in this study (HIV-E vs control infants).
sam_table_mi %>%
filter(MothChild == "Infant") %>%
dplyr::group_by(timepoint) %>%
dplyr::summarise("Average Infant Age" = round(mean(Age)),
"SD of Infant Age" = round(sd(Age)))
## # A tibble: 7 × 3
## timepoint `Average Infant Age` `SD of Infant Age`
## <int> <dbl> <dbl>
## 1 0 7 1
## 2 1 22 2
## 3 2 42 3
## 4 3 58 2
## 5 4 74 2
## 6 5 88 2
## 7 6 105 2
sam_table_mi %>%
dplyr::rename("HIV Status" = HIVStatus) %>%
mutate("Timepoint" = factor(timepoint)) %>%
table1::table1(~ Timepoint + `HIV Status` | factor(MothChild),
data = .)
| Infant (N=129) |
Mother (N=38) |
Overall (N=167) |
|
|---|---|---|---|
| Timepoint | |||
| 0 | 20 (15.5%) | 20 (52.6%) | 40 (24.0%) |
| 1 | 20 (15.5%) | 0 (0%) | 20 (12.0%) |
| 2 | 19 (14.7%) | 0 (0%) | 19 (11.4%) |
| 3 | 17 (13.2%) | 0 (0%) | 17 (10.2%) |
| 4 | 17 (13.2%) | 0 (0%) | 17 (10.2%) |
| 5 | 18 (14.0%) | 0 (0%) | 18 (10.8%) |
| 6 | 18 (14.0%) | 18 (47.4%) | 36 (21.6%) |
| HIV Status | |||
| Control | 68 (52.7%) | 19 (50.0%) | 87 (52.1%) |
| HIV | 61 (47.3%) | 19 (50.0%) | 80 (47.9%) |
Looking at reads
howmanyreads <- function(mothinf) {
counts_table_mi %>%
as_tibble() %>%
mutate(species = tax_table_mi$species) %>%
relocate(species) %>%
pivot_longer(cols = starts_with("X"),
names_to = "Sample", values_to = "Abundance") %>%
left_join(., sam_table_mi, by = "Sample") %>%
# summarize
filter(MothChild == mothinf) %>%
group_by(Sample) %>%
dplyr::summarise("Total Reads" = sum(`Abundance`)) %>%
arrange(desc(`Total Reads`)) %>%
dplyr::summarise(med_reads = median(`Total Reads`),
mean_reads = mean(`Total Reads`),
sd_reads = sd(`Total Reads`),
min_reads = min(`Total Reads`),
max_reads = max(`Total Reads`),
num_total = n())
}
howmanyreads("Mother")
## # A tibble: 1 × 6
## med_reads mean_reads sd_reads min_reads max_reads num_total
## <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 54298. 70463. 48730. 14339 208029 38
howmanyreads("Infant")
## # A tibble: 1 × 6
## med_reads mean_reads sd_reads min_reads max_reads num_total
## <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 65069 124905. 299120. 13218 3016276 129
sam_table %>%
group_by(timepoint) %>%
dplyr::summarise(median(`Age`))
## # A tibble: 7 × 2
## timepoint `median(Age)`
## <int> <dbl>
## 1 0 6
## 2 1 22
## 3 2 42
## 4 3 58
## 5 4 74
## 6 5 88
## 7 6 104
# Number of mother samples
# NUmber of samples' reads
create_long <- function(input_df, sam_tab = sam_table_mi,
tax_tab = tax_table_mi) {
input_df %>%
as_tibble() %>%
mutate(species = tax_table_mi$species) %>%
relocate(species) %>%
pivot_longer(cols = starts_with("X"),
names_to = "Sample", values_to = "Abundance") %>%
left_join(., sam_tab, by = "Sample") %>%
left_join(., tax_tab, by = "species") %>%
relocate(colnames(sam_tab))
}
alluvial_df <- create_long(counts_table_mi)
# Non-limited to any groupings, all samples!
alluvial_df %>%
group_by(genus) %>%
summarise(pct = round(sum(Abundance)/sum(alluvial_df$Abundance)*100, 4)) %>%
arrange(desc(pct))
## # A tibble: 648 × 2
## genus pct
## <chr> <dbl>
## 1 Corynebacterium 3.38
## 2 Bacillus 2.78
## 3 Pseudomonas 2.18
## 4 Mycolicibacterium 1.81
## 5 Streptomyces 1.69
## 6 Acinetobacter 1.59
## 7 Staphylococcus 1.55
## 8 Paracoccus 1.45
## 9 Bradyrhizobium 1.33
## 10 Nocardioides 1.09
## # … with 638 more rows
alluvial_df %>%
group_by(phylum) %>%
summarise(pct = round(sum(Abundance)/sum(alluvial_df$Abundance)*100, 4)) %>%
arrange(desc(pct))
## # A tibble: 17 × 2
## phylum pct
## <chr> <dbl>
## 1 Proteobacteria 37.5
## 2 Actinobacteria 26.4
## 3 Firmicutes 26.0
## 4 Bacteroidetes 7.14
## 5 Chloroflexi 0.847
## 6 Fusobacteria 0.606
## 7 Nitrospirae 0.363
## 8 Acidobacteria 0.242
## 9 Cyanobacteria 0.242
## 10 Deinococcus-Thermus 0.242
## 11 Planctomycetes 0.242
## 12 Gemmatimonadetes 0.121
## 13 Synergistetes 0.121
## 14 Tenericutes 0.0005
## 15 Armatimonadetes 0
## 16 Spirochaetes 0
## 17 Verrucomicrobia 0
# Genus
diversity_beta_test(subsetByColData(OG_dat,
which(OG_dat$MothChild == "Infant" &
OG_dat$timepoint == 0)),
tax_level = "genus",
input_beta_method = "bray", #jaccard
input_select_beta_condition = "HIVStatus",
input_select_beta_stat_method = "Wilcoxon rank sum test")
## HIV and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.33452417495224"
## Control and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.100432908365082"
## Control and HIV
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.678599395713093"
diversity_beta_test(subsetByColData(OG_dat,
which(OG_dat$MothChild == "Infant" &
OG_dat$timepoint == 6)),
tax_level = "genus",
input_beta_method = "bray", #jaccard
input_select_beta_condition = "HIVStatus",
input_select_beta_stat_method = "Wilcoxon rank sum test")
## HIV and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.20814402240231"
## Control and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "1.05307939650421e-06"
## Control and HIV
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.00175509727592689"
#animalcules::diversity_beta_boxplot(subsetByColData(OG_dat,
# which(OG_dat$MothChild == "Infant")),
# tax_level = "genus",
# input_beta_method = "bray", #jaccard
# input_select_beta_condition = "HIVStatus")
# Genus
diversity_beta_test(subsetByColData(OG_dat,
which(OG_dat$MothChild == "Mother" &
OG_dat$timepoint == 0)),
tax_level = "genus",
input_beta_method = "bray", #jaccard
input_select_beta_condition = "HIVStatus",
input_select_beta_stat_method = "Wilcoxon rank sum test")
## HIV and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.826219946602891"
## Control and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.419719970507453"
## Control and HIV
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.700208637633154"
diversity_beta_test(subsetByColData(OG_dat,
which(OG_dat$MothChild == "Mother" &
OG_dat$timepoint == 6)),
tax_level = "genus",
input_beta_method = "bray", #jaccard
input_select_beta_condition = "HIVStatus",
input_select_beta_stat_method = "Wilcoxon rank sum test")
## Control and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "5.85363786404608e-07"
## HIV and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.867001372812487"
## HIV and Control
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.000101538359001655"
#animalcules::diversity_beta_boxplot(subsetByColData(OG_dat,
# which(OG_dat$MothChild == "Mother")),
# tax_level = "genus",
# input_beta_method = "bray", #jaccard
# input_select_beta_condition = "HIVStatus")
# Genus
diversity_beta_test(subsetByColData(OG_dat,
which(OG_dat$MothChild == "Infant" &
OG_dat$HIVStatus == "HIV" &
OG_dat$timepoint %in% c(0, 6))),
tax_level = "genus",
input_beta_method = "bray", #jaccard
input_select_beta_condition = "timepoint",
input_select_beta_stat_method = "Wilcoxon rank sum test")
## 6 and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "1.38490591817349e-20"
## 0 and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "2.21076524628771e-17"
## 0 and 6
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.114781658698142"
#animalcules::diversity_beta_boxplot(subsetByColData(OG_dat,
# which(OG_dat$MothChild == "Mother")),
# tax_level = "genus",
# input_beta_method = "bray", #jaccard
# input_select_beta_condition = "HIVStatus")
diversity_beta_test(subsetByColData(OG_dat,
which(OG_dat$MothChild == "Infant" &
OG_dat$HIVStatus == "Control" &
OG_dat$timepoint %in% c(0, 6))),
tax_level = "genus",
input_beta_method = "bray", #jaccard
input_select_beta_condition = "timepoint",
input_select_beta_stat_method = "Wilcoxon rank sum test")
## 6 and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "2.28981697925407e-28"
## 0 and between
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "5.69660947216888e-29"
## 0 and 6
## Method "Wilcoxon rank sum test with continuity correction"
## P-value "0.781417368229006"
obtain_gn <- function(counts_tab, tax_tab) {
all_relabu_genus <- counts_tab |>
as.matrix() |>
# Get rel abu within samokes
prop.table(margin = 2) |>
as_tibble() |>
bind_cols(genus = tax_tab$genus) |>
relocate(genus) |>
group_by(genus) |>
# Sum rel abu within samples/columns for genera
summarise(across(.fns = sum)) %>%
# Sum everything but the first columm ("genus")
mutate(allmeans = apply(.[,-1], 1, mean)) |>
select(genus, allmeans) |>
mutate(genus = replace(genus, is.na(genus), "Unknown")) |>
arrange(desc(allmeans))
all_relabu_genus |>
select(genus) |> unlist() |> unname() %>% .[1:20] %>%
return()
}
ind <- microbe_mi$MothChild == "Infant"
best_genus <- obtain_gn(counts_table_mi[, ind], tax_table)
ind <- microbe_mi$MothChild == "Mother"
best_genus_m <- obtain_gn(counts_table_mi[, ind], tax_table_mi)
# Barplot for Control
p <- relabu_barplot(subsetByColData(OG_dat,
which(OG_dat$HIVStatus == "Control" &
OG_dat$MothChild == "Infant")),
tax_level = "genus",
order_organisms = best_genus[1:20],
sort_by = "conditions",
group_samples = TRUE,
group_conditions = "timepoint",
show_legend = TRUE)
#plotly::export(p, file = "PaperFigs/RAW_barplot_control_inf.png",
# vwidth = 750, vheight = 550)
p
# Barplot for Control
p <- relabu_barplot(subsetByColData(OG_dat,
which(OG_dat$HIVStatus == "HIV" &
OG_dat$MothChild == "Infant")),
tax_level = "genus",
order_organisms = best_genus[1:20],
sort_by = "conditions",
group_samples = TRUE,
group_conditions = "timepoint",
show_legend = TRUE)
#plotly::export(p, file = "PaperFigs/RAW_barplot_HIV_inf.png",
# vwidth = 750, vheight = 550)
p
# Barplot for Control
p <- relabu_barplot(subsetByColData(OG_dat,
which(OG_dat$HIVStatus == "Control" &
OG_dat$MothChild == "Mother")),
tax_level = "genus",
order_organisms = best_genus_m[1:20],
sort_by = "conditions",
group_samples = TRUE,
group_conditions = "timepoint",
show_legend = TRUE)
#plotly::export(p, file = "PaperFigs/RAW_barplot_control_mom.png",
# vwidth = 750, vheight = 550)
p
# Barplot for Control
p <- relabu_barplot(subsetByColData(OG_dat,
which(OG_dat$HIVStatus == "HIV" &
OG_dat$MothChild == "Mother")),
tax_level = "genus",
order_organisms = best_genus_m[1:20],
sort_by = "conditions",
group_samples = TRUE,
group_conditions = "timepoint",
show_legend = TRUE)
#plotly::export(p, file = "PaperFigs/RAW_barplot_HIV_mom.png",
# vwidth = 750, vheight = 550)
p